---
title: "Word polarity"
author: "Kohei Watanabe"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
output: html_document
---

```{r message=FALSE}
knitr::opts_chunk$set(echo = TRUE, dpi = 150, fig.height = 5, fig.width = 10)
require(quanteda)
require(LSX)
require(ggplot2)
require(ggrepel)
dict <- dictionary(file = 'keywords.yml')
lss <- readRDS("lss.RDS")
```

```{r fig.height=6}
highlight <- unlist(dict$highlight)
dat <- data.frame(word = names(lss$beta), beta = lss$beta, frequency = log(lss$frequency),
                  stringsAsFactors = FALSE)
dat_black <- subset(dat, word %in% highlight)
dat_gray <- subset(dat, !word %in% highlight)
gg <- ggplot(data = dat_gray, aes(x = beta, y = frequency, label = word)) +
        geom_text(colour = "grey70", alpha = 0.7) +
        labs(x = "Polarity score", y = "Log term-frequency") +
        theme_bw() +
        theme(panel.grid= element_blank(),
              axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
              axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))
gg + geom_text_repel(data = dat_black, aes(x = beta, y = frequency, label = word), 
                     segment.size = 0.25, colour = "black") +
     geom_point(data = dat_black, aes(x = beta, y = frequency), cex = 0.7, colour = "black")

```

```{r}
dat_black[order(dat_black$beta, decreasing = TRUE),,drop = FALSE]
```

